rm(list=ls())
#' 9017 Online Panels Benchmarking Study SRM Paper
#' Analysis syntax
#'
#' @author  Dina Neiger
#' @version 20220112
#'
#'
#' @input   
#' Analysis file from Step 1
#' Demographic variable names and labels
#' 
#' 
#' @output
#' 
#' File for significance testing of secondary demographic variables unweighted
#' Unweighted secondary demographics tables proportions in Table C2


options(java.parameters = "-Xmx6096m")


library("plyr")
library("xlsx")
library(reshape2)
library("haven")

# Date formats for xlsxssf
options(xlsx.date.format="dd/MM/yyyy")
options(xlsx.datetime.format="dd/MM/yyyy")

# Windows stub
Z_PATH <- "Z:/"

setwd("Z:/Research Papers and Presentations/SRM article/Submission syntax")

WRK_DIR <- setwd("Z:/Research Papers and Presentations/SRM article/Submission syntax")

INP_DIR <- paste0(WRK_DIR,"/Inputs/")
OUT_DIR <- paste0(WRK_DIR, "/Outputs/")

### read var names and labels for significance testing
load(paste0(INP_DIR, "dvarnames.RData"))

### read analysis file
load(paste0(OUT_DIR,"findata.RData"))




###function to prepare variables for calculation of weighted proportions
wprop_prep <- function(varin, propcat, weightvar){
  varout <- as.numeric(weightvar)
  varout[!(varin %in% propcat)] <- 0
  return(varout)
}


### derive substantive variables for significance testing
dsigdata_unweighted                    <- findata[, c("surtype","surtype.l","weight1","resp_id")]
dsigdata_unweighted$weight1 <- 1


varnames_ds$varname[!varnames_ds$varname %in% names(findata)]

#weighted proportions for substantive variables
varnames <- varnames_ds$varname
catnames <- varnames_ds$catname
wvarnames <- NULL
for(i in 1:length(varnames)){
    wvarnames[i]<- paste0(varnames[i],"_",catnames[i],"w1")
    dsigdata_unweighted[wvarnames[[i]]] <- 
           wprop_prep(findata[,varnames[[i]]],catnames[i],dsigdata_unweighted$weight1)
}
wvarnames 
dsigdata_unweighted <- merge(dsigdata_unweighted,findata[,c("resp_id","d16_base")],by="resp_id",all.x=T)
#save the file for significance testing
save( dsigdata_unweighted,file = paste0(OUT_DIR, "S5-udemdata.RData"))
write.xlsx2( dsigdata_unweighted,paste0(OUT_DIR,"S5-udemdata.xlsx", sep=""), row.names = FALSE,showNA=FALSE)



#frequency tables weighted 

check_prop <- function(x,wvar){
  x<-x[!is.na(x[,wvar]),]
  ddply(x,.(surtype.l),function(z){z$t=round(100*sum(z[,wvar])/sum(z$weight1),2)})
}
x<- check_prop(dsigdata_unweighted,"atsi_29w1")

# Table C1 ATSI unweighted estimate
x

# Weighted results for demographic variables
z<-NULL
checkds <- NULL
for(i in 1:length(varnames)){
  z <- check_prop(dsigdata_unweighted, wvarnames[[i]])
  names(z) <- c("surtype.l",paste0("e_",wvarnames[[i]]))
  if(i==1) checkds <- z
  else checkds <- merge(checkds,z,by="surtype.l")
    }

t(checkds)





